{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "7bd6a19a-12d1-463a-8b7d-2c834d78c7d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"5\"\n",
    "model_name_or_path = \"openai/whisper-small\"  \n",
    "#model_dir = \"models/whisper-small-asr-int8\"\n",
    "model_dir = \"/opt/cuijq/model/whisper-large-v2-asr-int8\"\n",
    "language = \"Chinese (China)\"\n",
    "language_abbr = \"zh-CN\"\n",
    "task = \"transcribe\"\n",
    "#dataset_name  = \"/opt/cuijq/common_voice_11_0\" \n",
    "dataset_name = \"mozilla-foundation/common_voice_11_0\"\n",
    "batch_size=8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5bc3d7b-64da-4241-9fe5-04414941abb2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import login\n",
    "from datasets import load_dataset, DatasetDict\n",
    "login()\n",
    "common_voice = DatasetDict()\n",
    "common_voice[\"train\"] = load_dataset(\n",
    "    \"mozilla-foundation/common_voice_16_1\", \n",
    "    language_abbr, \n",
    "    split=\"train\",\n",
    "    token=True  # Ensures authentication is used\n",
    ")\n",
    "common_voice[\"validation\"] = load_dataset(\n",
    "    \"mozilla-foundation/common_voice_16_1\", \n",
    "    language_abbr, \n",
    "    split=\"validation\",\n",
    "    streaming=True,\n",
    "    token=True\n",
    ")\n",
    "print(common_voice[\"train\"][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "6551d3d6-6abd-467b-a167-f0d329caabf9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    \n",
       "})"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "common_voice"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "b6dc9c4b-3500-4808-831b-cb4e3f6103c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoFeatureExtractor, AutoTokenizer, AutoProcessor\n",
    "\n",
    "# 从预训练模型加载特征提取器\n",
    "feature_extractor = AutoFeatureExtractor.from_pretrained(model_name_or_path)\n",
    "\n",
    "# 从预训练模型加载分词器，可以指定语言和任务以获得最适合特定需求的分词器配置\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, language=language, task=task)\n",
    "\n",
    "# 从预训练模型加载处理器，处理器通常结合了特征提取器和分词器，为特定任务提供一站式的数据预处理\n",
    "processor = AutoProcessor.from_pretrained(model_name_or_path, language=language, task=task)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "3093519f-74df-4e03-8e0f-9387286b7d13",
   "metadata": {},
   "outputs": [],
   "source": [
    "common_voice = common_voice.remove_columns(\n",
    "    [\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"path\", \"segment\", \"up_votes\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c11012b2-6d3e-4dc5-bf31-c6f2cb8d1416",
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'train'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcommon_voice\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/datasets/dataset_dict.py:82\u001b[0m, in \u001b[0;36mDatasetDict.__getitem__\u001b[0;34m(self, k)\u001b[0m\n\u001b[1;32m     80\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, k) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Dataset:\n\u001b[1;32m     81\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(k, (\u001b[38;5;28mstr\u001b[39m, NamedSplit)) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m---> 82\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getitem__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     83\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     84\u001b[0m         available_suggested_splits \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m     85\u001b[0m             split \u001b[38;5;28;01mfor\u001b[39;00m split \u001b[38;5;129;01min\u001b[39;00m (Split\u001b[38;5;241m.\u001b[39mTRAIN, Split\u001b[38;5;241m.\u001b[39mTEST, Split\u001b[38;5;241m.\u001b[39mVALIDATION) \u001b[38;5;28;01mif\u001b[39;00m split \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n\u001b[1;32m     86\u001b[0m         ]\n",
      "\u001b[0;31mKeyError\u001b[0m: 'train'"
     ]
    }
   ],
   "source": [
    "common_voice[\"train\"][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "39636e82-4a91-4264-a337-75df96881191",
   "metadata": {},
   "outputs": [],
   "source": [
    "language = \"Chinese (China)\"\n",
    "language_abbr = \"zh-CN\"\n",
    "language_decode = \"chinese\"\n",
    "task = \"transcribe\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3a8684c5-6c1e-44e3-aeb8-7eabb8a9ab41",
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "Can't find 'adapter_config.json' at 'models/whisper-small-asr-int8'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mHTTPError\u001b[0m                                 Traceback (most recent call last)",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_http.py:409\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    408\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 409\u001b[0m     \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    410\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/requests/models.py:1026\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1025\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1026\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
      "\u001b[0;31mHTTPError\u001b[0m: 401 Client Error: Unauthorized for url: https://huggingface.co/models/whisper-small-asr-int8/resolve/main/adapter_config.json",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mRepositoryNotFoundError\u001b[0m                   Traceback (most recent call last)",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/peft/config.py:200\u001b[0m, in \u001b[0;36mPeftConfigMixin.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, subfolder, **kwargs)\u001b[0m\n\u001b[1;32m    199\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 200\u001b[0m     config_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    201\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCONFIG_NAME\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhf_hub_download_kwargs\u001b[49m\n\u001b[1;32m    202\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    203\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1010\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[1;32m   1009\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1010\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1011\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[1;32m   1012\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1013\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[1;32m   1014\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1015\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1016\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1017\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1018\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[1;32m   1019\u001b[0m \u001b[43m        \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1020\u001b[0m \u001b[43m        \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1021\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1022\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1023\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1024\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[1;32m   1025\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1026\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1027\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1117\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[0;34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[0m\n\u001b[1;32m   1116\u001b[0m     \u001b[38;5;66;03m# Otherwise, raise appropriate error\u001b[39;00m\n\u001b[0;32m-> 1117\u001b[0m     \u001b[43m_raise_on_head_call_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhead_call_error\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1119\u001b[0m \u001b[38;5;66;03m# From now on, etag, commit_hash, url and size are not None.\u001b[39;00m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1658\u001b[0m, in \u001b[0;36m_raise_on_head_call_error\u001b[0;34m(head_call_error, force_download, local_files_only)\u001b[0m\n\u001b[1;32m   1653\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, (RepositoryNotFoundError, GatedRepoError)) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m   1654\u001b[0m     \u001b[38;5;28misinstance\u001b[39m(head_call_error, HfHubHTTPError) \u001b[38;5;129;01mand\u001b[39;00m head_call_error\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m401\u001b[39m\n\u001b[1;32m   1655\u001b[0m ):\n\u001b[1;32m   1656\u001b[0m     \u001b[38;5;66;03m# Repo not found or gated => let's raise the actual error\u001b[39;00m\n\u001b[1;32m   1657\u001b[0m     \u001b[38;5;66;03m# Unauthorized => likely a token issue => let's raise the actual error\u001b[39;00m\n\u001b[0;32m-> 1658\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m head_call_error\n\u001b[1;32m   1659\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1660\u001b[0m     \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1546\u001b[0m, in \u001b[0;36m_get_metadata_or_catch_error\u001b[0;34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, token, local_files_only, relative_filename, storage_folder)\u001b[0m\n\u001b[1;32m   1545\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1546\u001b[0m     metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_hf_file_metadata\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1547\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\n\u001b[1;32m   1548\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1549\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1463\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[0;34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers, endpoint)\u001b[0m\n\u001b[1;32m   1462\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[0;32m-> 1463\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1464\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHEAD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1465\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1466\u001b[0m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1467\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1468\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1469\u001b[0m \u001b[43m    \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1470\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1471\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1472\u001b[0m hf_raise_for_status(r)\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:286\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    285\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[0;32m--> 286\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    287\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    288\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    289\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    290\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    291\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    293\u001b[0m     \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[1;32m    294\u001b[0m     \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:310\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    309\u001b[0m response \u001b[38;5;241m=\u001b[39m http_backoff(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams, retry_on_exceptions\u001b[38;5;241m=\u001b[39m(), retry_on_status_codes\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m429\u001b[39m,))\n\u001b[0;32m--> 310\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_http.py:459\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    450\u001b[0m     message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    451\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    452\u001b[0m         \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    457\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m https://huggingface.co/docs/huggingface_hub/authentication\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    458\u001b[0m     )\n\u001b[0;32m--> 459\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m _format(RepositoryNotFoundError, message, response) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    461\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n",
      "\u001b[0;31mRepositoryNotFoundError\u001b[0m: 401 Client Error. (Request ID: Root=1-68ab3557-2d815e6960b0885c77189507;177f303c-1a72-4b5b-96d3-63630a4ae3b5)\n\nRepository Not Found for url: https://huggingface.co/models/whisper-small-asr-int8/resolve/main/adapter_config.json.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated. For more details, see https://huggingface.co/docs/huggingface_hub/authentication\nInvalid username or password.",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[6], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtransformers\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoProcessor\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpeft\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PeftConfig, PeftModel\n\u001b[0;32m----> 4\u001b[0m peft_config \u001b[38;5;241m=\u001b[39m \u001b[43mPeftConfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      6\u001b[0m base_model \u001b[38;5;241m=\u001b[39m AutoModelForSpeechSeq2Seq\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[1;32m      7\u001b[0m     peft_config\u001b[38;5;241m.\u001b[39mbase_model_name_or_path, load_in_8bit\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, device_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m      8\u001b[0m )\n\u001b[1;32m     10\u001b[0m peft_model \u001b[38;5;241m=\u001b[39m PeftModel\u001b[38;5;241m.\u001b[39mfrom_pretrained(base_model, model_dir)\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/peft/config.py:204\u001b[0m, in \u001b[0;36mPeftConfigMixin.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, subfolder, **kwargs)\u001b[0m\n\u001b[1;32m    200\u001b[0m         config_file \u001b[38;5;241m=\u001b[39m hf_hub_download(\n\u001b[1;32m    201\u001b[0m             pretrained_model_name_or_path, CONFIG_NAME, subfolder\u001b[38;5;241m=\u001b[39msubfolder, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mhf_hub_download_kwargs\n\u001b[1;32m    202\u001b[0m         )\n\u001b[1;32m    203\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 204\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCONFIG_NAME\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m at \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m    206\u001b[0m loaded_attributes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mfrom_json_file(config_file)\n\u001b[1;32m    207\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mclass_kwargs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mloaded_attributes}\n",
      "\u001b[0;31mValueError\u001b[0m: Can't find 'adapter_config.json' at 'models/whisper-small-asr-int8'"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoProcessor\n",
    "from peft import PeftConfig, PeftModel\n",
    "\n",
    "peft_config = PeftConfig.from_pretrained(model_dir)\n",
    "\n",
    "base_model = AutoModelForSpeechSeq2Seq.from_pretrained(\n",
    "    peft_config.base_model_name_or_path, load_in_8bit=True, device_map=\"auto\"\n",
    ")\n",
    "\n",
    "peft_model = PeftModel.from_pretrained(base_model, model_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "70f5377b-867b-4029-a6e0-cb521c503097",
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "Can't find 'adapter_config.json' at 'models/whisper-small-asr-int8'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mHTTPError\u001b[0m                                 Traceback (most recent call last)",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_http.py:409\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    408\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 409\u001b[0m     \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    410\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/requests/models.py:1026\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1025\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1026\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
      "\u001b[0;31mHTTPError\u001b[0m: 401 Client Error: Unauthorized for url: https://huggingface.co/models/whisper-small-asr-int8/resolve/main/adapter_config.json",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mRepositoryNotFoundError\u001b[0m                   Traceback (most recent call last)",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/peft/config.py:200\u001b[0m, in \u001b[0;36mPeftConfigMixin.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, subfolder, **kwargs)\u001b[0m\n\u001b[1;32m    199\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 200\u001b[0m     config_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    201\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCONFIG_NAME\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhf_hub_download_kwargs\u001b[49m\n\u001b[1;32m    202\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    203\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1010\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[1;32m   1009\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1010\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1011\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[1;32m   1012\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1013\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[1;32m   1014\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1015\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1016\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1017\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1018\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[1;32m   1019\u001b[0m \u001b[43m        \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1020\u001b[0m \u001b[43m        \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1021\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1022\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1023\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1024\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[1;32m   1025\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1026\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1027\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1117\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[0;34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[0m\n\u001b[1;32m   1116\u001b[0m     \u001b[38;5;66;03m# Otherwise, raise appropriate error\u001b[39;00m\n\u001b[0;32m-> 1117\u001b[0m     \u001b[43m_raise_on_head_call_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhead_call_error\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1119\u001b[0m \u001b[38;5;66;03m# From now on, etag, commit_hash, url and size are not None.\u001b[39;00m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1658\u001b[0m, in \u001b[0;36m_raise_on_head_call_error\u001b[0;34m(head_call_error, force_download, local_files_only)\u001b[0m\n\u001b[1;32m   1653\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, (RepositoryNotFoundError, GatedRepoError)) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m   1654\u001b[0m     \u001b[38;5;28misinstance\u001b[39m(head_call_error, HfHubHTTPError) \u001b[38;5;129;01mand\u001b[39;00m head_call_error\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m401\u001b[39m\n\u001b[1;32m   1655\u001b[0m ):\n\u001b[1;32m   1656\u001b[0m     \u001b[38;5;66;03m# Repo not found or gated => let's raise the actual error\u001b[39;00m\n\u001b[1;32m   1657\u001b[0m     \u001b[38;5;66;03m# Unauthorized => likely a token issue => let's raise the actual error\u001b[39;00m\n\u001b[0;32m-> 1658\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m head_call_error\n\u001b[1;32m   1659\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1660\u001b[0m     \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1546\u001b[0m, in \u001b[0;36m_get_metadata_or_catch_error\u001b[0;34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, token, local_files_only, relative_filename, storage_folder)\u001b[0m\n\u001b[1;32m   1545\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1546\u001b[0m     metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_hf_file_metadata\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1547\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\n\u001b[1;32m   1548\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1549\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1463\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[0;34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers, endpoint)\u001b[0m\n\u001b[1;32m   1462\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[0;32m-> 1463\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1464\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHEAD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1465\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1466\u001b[0m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1467\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1468\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1469\u001b[0m \u001b[43m    \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1470\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1471\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1472\u001b[0m hf_raise_for_status(r)\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:286\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    285\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[0;32m--> 286\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    287\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    288\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    289\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    290\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    291\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    293\u001b[0m     \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[1;32m    294\u001b[0m     \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:310\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    309\u001b[0m response \u001b[38;5;241m=\u001b[39m http_backoff(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams, retry_on_exceptions\u001b[38;5;241m=\u001b[39m(), retry_on_status_codes\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m429\u001b[39m,))\n\u001b[0;32m--> 310\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_http.py:459\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    450\u001b[0m     message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    451\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    452\u001b[0m         \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    457\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m https://huggingface.co/docs/huggingface_hub/authentication\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    458\u001b[0m     )\n\u001b[0;32m--> 459\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m _format(RepositoryNotFoundError, message, response) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    461\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n",
      "\u001b[0;31mRepositoryNotFoundError\u001b[0m: 401 Client Error. (Request ID: Root=1-68ab36db-23af745c75e2392f7f83451f;0af94093-6356-4931-a89b-7c1215f2abc4)\n\nRepository Not Found for url: https://huggingface.co/models/whisper-small-asr-int8/resolve/main/adapter_config.json.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated. For more details, see https://huggingface.co/docs/huggingface_hub/authentication\nInvalid username or password.",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[9], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtransformers\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoProcessor\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpeft\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PeftConfig, PeftModel\n\u001b[0;32m----> 4\u001b[0m peft_config \u001b[38;5;241m=\u001b[39m \u001b[43mPeftConfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      6\u001b[0m base_model \u001b[38;5;241m=\u001b[39m AutoModelForSpeechSeq2Seq\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[1;32m      7\u001b[0m     peft_config\u001b[38;5;241m.\u001b[39mbase_model_name_or_path, load_in_8bit\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, device_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m      8\u001b[0m )\n\u001b[1;32m      9\u001b[0m base_model\u001b[38;5;241m.\u001b[39mrequires_grad_(\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/peft/config.py:204\u001b[0m, in \u001b[0;36mPeftConfigMixin.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, subfolder, **kwargs)\u001b[0m\n\u001b[1;32m    200\u001b[0m         config_file \u001b[38;5;241m=\u001b[39m hf_hub_download(\n\u001b[1;32m    201\u001b[0m             pretrained_model_name_or_path, CONFIG_NAME, subfolder\u001b[38;5;241m=\u001b[39msubfolder, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mhf_hub_download_kwargs\n\u001b[1;32m    202\u001b[0m         )\n\u001b[1;32m    203\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 204\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCONFIG_NAME\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m at \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m    206\u001b[0m loaded_attributes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mfrom_json_file(config_file)\n\u001b[1;32m    207\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mclass_kwargs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mloaded_attributes}\n",
      "\u001b[0;31mValueError\u001b[0m: Can't find 'adapter_config.json' at 'models/whisper-small-asr-int8'"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoProcessor\n",
    "from peft import PeftConfig, PeftModel\n",
    "\n",
    "peft_config = PeftConfig.from_pretrained(model_dir)\n",
    "\n",
    "base_model = AutoModelForSpeechSeq2Seq.from_pretrained(\n",
    "    peft_config.base_model_name_or_path, load_in_8bit=True, device_map=\"auto\"\n",
    ")\n",
    "base_model.requires_grad_(False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "994fc816-700d-4f17-94a1-7271ffadad18",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'peft_config' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[43mpeft_config\u001b[49m\u001b[38;5;241m.\u001b[39mbase_model_name_or_path, language\u001b[38;5;241m=\u001b[39mlanguage, task\u001b[38;5;241m=\u001b[39mtask)\n\u001b[1;32m      2\u001b[0m processor \u001b[38;5;241m=\u001b[39m AutoProcessor\u001b[38;5;241m.\u001b[39mfrom_pretrained(peft_config\u001b[38;5;241m.\u001b[39mbase_model_name_or_path, language\u001b[38;5;241m=\u001b[39mlanguage, task\u001b[38;5;241m=\u001b[39mtask)\n\u001b[1;32m      3\u001b[0m feature_extractor \u001b[38;5;241m=\u001b[39m processor\u001b[38;5;241m.\u001b[39mfeature_extractor\n",
      "\u001b[0;31mNameError\u001b[0m: name 'peft_config' is not defined"
     ]
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path, language=language, task=task)\n",
    "processor = AutoProcessor.from_pretrained(peft_config.base_model_name_or_path, language=language, task=task)\n",
    "feature_extractor = processor.feature_extractor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "4faa0e81-ca46-417f-95c0-ea2b08efe99b",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'peft_model' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[8], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m test_audio \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata/test_zh.flac\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtransformers\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AutomaticSpeechRecognitionPipeline\n\u001b[0;32m----> 4\u001b[0m pipeline \u001b[38;5;241m=\u001b[39m AutomaticSpeechRecognitionPipeline(model\u001b[38;5;241m=\u001b[39m\u001b[43mpeft_model\u001b[49m, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer, feature_extractor\u001b[38;5;241m=\u001b[39mfeature_extractor)\n\u001b[1;32m      6\u001b[0m forced_decoder_ids \u001b[38;5;241m=\u001b[39m processor\u001b[38;5;241m.\u001b[39mget_decoder_prompt_ids(language\u001b[38;5;241m=\u001b[39mlanguage_decode, task\u001b[38;5;241m=\u001b[39mtask)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'peft_model' is not defined"
     ]
    }
   ],
   "source": [
    "test_audio = \"data/test_zh.flac\"\n",
    "from transformers import AutomaticSpeechRecognitionPipeline\n",
    "\n",
    "pipeline = AutomaticSpeechRecognitionPipeline(model=peft_model, tokenizer=tokenizer, feature_extractor=feature_extractor)\n",
    "\n",
    "forced_decoder_ids = processor.get_decoder_prompt_ids(language=language_decode, task=task)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
